library(janitor)
library(tidyverse)
library(ggplot2)
library(readr)
library(stringr)
library(readxl)
library(viridisLite)


dem_party_platform <- read_csv("Yale/Amelia work/Excel files/Dem_partyplatform_20.1.csv") %>% 
  filter(year > 1976)

economic_codes <- c(1, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 17, 18, 21)

cultural_codes <- c(2, 9, 12)

defense_codes <- c(16, 19)

other_codes <- c(20)


#topic areas
dem_party_platform_2 <- dem_party_platform %>% 
  mutate(economic = ifelse(majortopic %in% economic_codes, 1, 0)) %>% 
  mutate(cultural = ifelse(majortopic %in% cultural_codes, 1, 0)) %>% 
  mutate(defense = ifelse(majortopic %in% defense_codes, 1, 0)) %>% 
  mutate(other = ifelse(majortopic %in% other_codes | subtopic == 2102, 1, 0)) %>% 
  filter(majortopic != -555)

dem_party_topics_2 <- dem_party_platform_2 %>% 
  group_by(year) %>% 
  summarize(economic = mean(economic), `cultural/identity` = mean(cultural), 
            `defense/international` = mean(defense), other = mean(other)) %>% 
  pivot_longer(cols = c(economic, `cultural/identity`, `defense/international`, other), 
               names_to = "topic", values_to = "percent") %>% 
  mutate(percent = percent*100)

dem_party_topics_2$topic_factor <- factor(dem_party_topics_2$topic, c("economic", "cultural/identity", "defense/international", "other"))

#plot 
ggplot(dem_party_topics_2, aes(x = year, y = percent, col = topic_factor)) + 
  geom_point() + geom_line() + theme_classic() + 
  scale_x_continuous(breaks = c(1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020)) + 
  scale_y_continuous(breaks = c(10, 20, 30, 40, 50, 60)) + 
  scale_color_viridis_d(begin = "0.2", end = "0.9", option = "G") + 
  #scale_color_grey(start = 0.2, end = 0.8) + 
  theme(legend.position = "bottom") + 
  labs(x = "Year", y = "Percent of platform", color = "Topic")

ggsave("platform percent revised.png", height = 8, width = 8)

#save 
save(dem_party_topics_2, file = "dem_platform_topics.rdata")

write.csv(dem_party_topics_2, file = "dem platform topics.csv")



#DATA FROM CITYLAB CDI 
#2000 districts 2000-2008 presidential  

urlfile= "https://raw.githubusercontent.com/theatlantic/citylab-data/master/citylab-congress/2010-districts/2010_districts.csv"
cdi_2000 <-read_csv(url(urlfile))

write_xlsx(cdi_2000, "cdi_2000.xlsx")

#2010 districts 2012-2016 presidential 
urlfile = "https://raw.githubusercontent.com/theatlantic/citylab-data/master/citylab-congress/citylab_cdi_extended.csv"
cdi_2010<-read_csv(url(urlfile))

write_xlsx(cdi_2010, "cdi_2010_2.xlsx")

#PREP FOR ANALYSIS   
cdi_2000 <- read_excel("cdi_2000.xlsx")
cdi_2010_updated <- read_excel("cdi_2010_2.xlsx")

#2010 districts 
cdi_2010_updated2 <- cdi_2010_updated %>% mutate(
  `2018` = case_when(
    Pre_2018_party == "R" & winner_party_2018 == "R" ~ "R",
    Pre_2018_party == "D" & winner_party_2018 == "D" ~ "D",
    Pre_2018_party == "R" & winner_party_2018 == "D" ~ "D+1",
    Pre_2018_party == "D" & winner_party_2018 == "R" ~ "D-1"))

cdi_2010_updated4 <- cdi_2010_updated2 %>% rename(c(`2020` = Biden2020, `2016` = Clinton16, `2012` = Obama12))

cdi_2010_updated4 <- cdi_2010_updated4 %>% 
  mutate(`2016` = `2016` *100, `2012` = `2012` *100,)

cdi_2010_select <- cdi_2010_updated4 %>% 
  select(c(CD, Cluster, `2020`, `2016`, `2012`, `2018`))

cdi_2010_long <- cdi_2010_select %>% 
  pivot_longer(cols = c(`2020`, `2016`, `2012`), names_to = "Presidential_election", values_to = "Democratic_vote")

cdi_2010_long2 <- cdi_2010_long %>% 
  pivot_longer(cols = `2018`, names_to = "Midterm_election", values_to = "Partisan_change")

View(cdi_2010_long2)



#2000 districts 
cdi_2000_updated2 <- cdi_2000 %>% mutate(
  `2010` = case_when(
    Party2010 == "R" & winner2010 == "R" ~ "R",
    Party2010 == "D" & winner2010 == "D" ~ "D",
    Party2010 == "R" & winner2010== "D" ~ "D+1",
    Party2010 == "D" & winner2010 == "R" ~ "D-1"))

cdi_2000_updated4 <- cdi_2000_updated2 %>% rename(c(`2008` = obama08, `2004` = kerry04))

cdi_2000_updated4 <- cdi_2000_updated4 %>% 
  mutate(`2008` = `2008` *100, `2004` = `2004` *100,)

cdi_2000_select <- cdi_2000_updated4 %>% 
  select(c(CD, Cluster, `2008`, `2004`, `2010`))

cdi_2000_long <- cdi_2000_select %>% 
  pivot_longer(cols = c(`2008`, `2004`), names_to = "Presidential_election", values_to = "Democratic_vote")

cdi_2000_long2 <- cdi_2000_long %>% 
  pivot_longer(cols = `2010`, names_to = "Midterm_election", values_to = "Partisan_change")



#bind datasets and summarize
cdi <- rbind(cdi_2000_long2, cdi_2010_long2)
View(cdi)

cdi_stats <- cdi %>% 
  group_by(Cluster, Presidential_election) %>% 
  dplyr::summarize(Average_Democratic_vote = mean(Democratic_vote)) 

cdi_stats <- cdi_stats %>% 
  mutate(cluster = case_when(
    Cluster == "Pure urban" ~ "1",
    Cluster == "Urban-suburban mix" ~ "2", 
    Cluster == "Dense suburban" ~ "3", 
    Cluster == "Sparse suburban" ~ "4", 
    Cluster == "Rural-suburban mix" ~ "5", 
    Cluster == "Pure rural" ~ "6"
  ))

cdi_stats_revised <- cdi_stats %>% 
  filter(Presidential_election == "2004" | Presidential_election == "2020")

cdi_stats2 <- cdi %>% 
  group_by(Cluster, Midterm_election) %>% 
  dplyr::summarize(Average_Democratic_vote = mean(Democratic_vote)) 

cdi_midterms <- cdi %>% 
  filter(Partisan_change == "D+1" | Partisan_change == "D-1") %>% 
  distinct(CD, Midterm_election, .keep_all = TRUE) %>%
  mutate(cluster = case_when(
    Cluster == "Pure urban" ~ "1",
    Cluster == "Urban-suburban mix" ~ "2", 
    Cluster == "Dense suburban" ~ "3", 
    Cluster == "Sparse suburban" ~ "4", 
    Cluster == "Rural-suburban mix" ~ "5", 
    Cluster == "Pure rural" ~ "6"
  ))

cdi_midterms_updated <- cdi_midterms %>%
  group_by(Cluster, cluster, Midterm_election, Partisan_change) %>% 
  dplyr::summarize(number_change = n()) 


cdi_midterms_updated2 <- cdi_midterms_updated %>% 
  pivot_wider(id_cols = c(Cluster, cluster, Midterm_election), names_from = Partisan_change, values_from = number_change)

cdi_midterms_updated2[is.na(cdi_midterms_updated2)] <- 0

cdi_midterms_updated3 <- cdi_midterms_updated2 %>% 
  group_by(Cluster, Midterm_election) %>% 
  mutate(net_change = (`D+1`-`D-1`))


#PLOTTING CDI, revised 

ggplot(data = cdi_midterms_updated3, mapping = aes(x = cluster, y = net_change, fill = Midterm_election)) +
  geom_col(position = "dodge") + 
  scale_fill_viridis_d(begin = "0.2", end = "0.9", option = "G") + 
  theme_classic() + 
  scale_x_discrete(breaks = c(1, 2, 3, 4, 5, 6), labels = c("Pure urban", "Urban-suburban mix", "Dense suburban", "Sparse suburban", "Rural-suburban mix", "Pure rural")) + 
  labs(x = "Congressional district density", y = "Net Democratic seats flipped", fill = "Midterm election") + theme(legend.position="bottom")


#save
ggsave("congressional district density.png", height = 8, width = 8)

save(cdi_midterms_updated3, file = "congressional district density.rdata")

write.csv(cdi_midterms_updated3, file = "congressional district density.csv")
